import pymysql
import matplotlib.pyplot as plt

HOST = "localhost"
USER = "root"
PASSWORD = "123456"
DATABASE = "51job"
PORT = 3306,
CHARSET = "utf8"
TABLE_NAME = "51job_table"
SAVEFILE_PATH = "../data/"
list_x = []
list_y = []

connection = pymysql.Connect(host=HOST, user=USER, password=PASSWORD, database=DATABASE, port=3306,
                      charset=CHARSET)
cur = connection.cursor()

def matplot_chinese():
    plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文标签
    plt.rcParams['axes.unicode_minus'] = False


HEADER = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    'Cookie': '_uab_collina=164083188927394862921656; partner=www_baidu_com; privacy=1640831873; guid=3ec9968fb69da6c4c985b99a92fb1e6c; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; acw_tc=2f624a5116408410489218571e1f29773fdfcbc25d684039598ac10fdef5aa; acw_sc__v2=61cd3f8c7ff55fd2001524e1730e634059bd99cd; acw_sc__v2=61cd3f8c7ff55fd2001524e1730e634059bd99cd; search=jobarea%7E%60000000%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%C8%CB%B9%A4%D6%C7%C4%DC%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21recentSearch1%7E%60000000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%B4%F3%CA%FD%BE%DD%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21collapse_expansion%7E%600%7C%21; arp_scroll_position=94; ssxmod_itna=Qq0x0Dci0=YmuRDl4iqYKEP7qciUFhDR7rpO7Qmx0vP+eGzDAxn40iDtorT1Yo8ie4NdF=Wppx8nj8hK46WRWw0aKjFr4GLDmKDyKYb4xoD445GwD0eG+DD4DWiqB6gDtqDkD3qDdUx7PDch5DLxi7zgIPDB+Yd8iCDQPDy2PDIxD1r3whDiIzDYvkDimzD75HDQIzbKYD+xTAuxi32ODDBh43970Aj=Mj/rgp8Rgv+9oACO8AnG0wNtM3n1yzUxibzpS+2r1zS=MNWj+YIQhYmhiWFiGq1AmohQGxdEroIGtPqjDiCeXeYr4KDDiOxqGxRjDxD=; ssxmod_itna2=Qq0x0Dci0=YmuRDl4iqYKEP7qciUFhDR7rpO7QDnI8YhKDsNeDLehiL=G7Y4qnRGCnjjDYP8YyDWA0pVjiuDrq52PnIAx0FiMvq+2+/Te=456DUe=edUA2BWAeEDTzn19obLGXEBPqEEA0xdBxKh/roeGEQPoSQ52EYeZfIpmmYwDqfRAlfi4qK=jWT7fxfVQ3WAY1E=lbeN=+m7P+eerp4mmPIcGSbsjDar/kdqZ3=YZPoAIl7WaKy1+1bK5Y28dx2Id9WrS43vtqh2lUBKkUddlOG0OHWria=8r1Z7KAOd/Mj4bt3qhYgCE/=IdrL2B3Kg6si3nEj56=mPrK23pCI0=d7n=+C676je=U=SPzaPureVBPHP5a83gBm7Y5=t5jlm2DiFbKr158YNZQvhgdE0I7WIEGdwbedYnVGXhboxOfi6RT=mD4dcCIZ2KEtpooKX3f+0a36IoOmcOfo6jUBE3zWTp9pnP7Fa=feTgfl9TTomVBrCtb/2ntFExvnnnAmo9PscK5uKvTD4ggYNdA4DQFoKDMlaTTbNAqvP9mx1OF0by8/CpMb4PiiX4cfobLhwvTbgA1=C5xn8Wz=DZ5qUZ/YiODH=nGYGTglPDZ02Hi=GDDFqD+aPqAx5G+lm+975mMleBC=YIPYD',
    'Host': 'search.51job.com',
    'Referer': 'https://search.51job.com/list/000000,000000,0000,00,9,99,%E5%A4%A7%E6%95%B0%E6%8D%AE,2,1.html?',
    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Microsoft Edge";v="96"',
    'sec-ch-ua-mobile': '?0',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62',
}


def format_url(page):
    # URL_STYLE = "https://search.51job.com/list/000000,000000,0000,26,9,99,+,2,{}.html".format(page)
    URL_STYLE = ["https://search.51job.com/list/000000,000000,0000,00,9,99,大数据,2,{}.html?".format(page),
                 "https://search.51job.com/list/000000,000000,0000,00,9,99,人工智能,2,{}.html?".format(page),
                ]
    return URL_STYLE


def format_write(i):
    format_string = "" \
                    "{job_name}\t{company_name}\t{providesalary_text}\t{workarea_text}\t" \
                    "{updatedate}\t{companytype_text}\t{degreefrom}\t{workyear}\t" \
                    "{issuedate}\t{jobwelf}\t{attribute_text}\t" \
                    "{companysize_text}\t{companyind_text}{newline}" \
        .format(
        job_name=i["job_name"], company_name=i["company_name"],
        providesalary_text=i["providesalary_text"],
        workarea_text=i["workarea_text"], updatedate=i["updatedate"],
        companytype_text=i["companytype_text"],
        degreefrom=i["degreefrom"], workyear=i["workyear"],
        issuedate=i["issuedate"], jobwelf=i["jobwelf"],
        attribute_text=" ".join(i["attribute_text"]),
        companysize_text=i["companysize_text"],
        companyind_text="not_found",
        newline="\n")
    return format_string


CREATE_TABLE_51JOB = """
            create table if not exists 51job_table(
                  job_name varchar(100), company_name varchar(100), 
                  providesalary_text varchar(100), workarea_text varchar(100), 
                  updatedate varchar(100), companytype_text varchar(100), 
                  degreefrom varchar(50), workyear varchar(50), 
                  issuedate varchar(50), jobwelf varchar(100), 
                  attribute_text varchar(100), companysize_text varchar(50), 
                  companyind_text varchar(50)
                  )
"""

CLEAR_TABLE = """
truncate table 51job_table
"""

COMPANYTYPE_TEXT = """
            select substring_index(companytype_text, '-', 1) companytype, count(*) companytype_count
            from 51job_table a
            group by companytype
            order by companytype_count desc
"""

HEAD_TOP_COMPANY = """
        select distinct company_name, count(company_name) company_count 
            from 51job_table 
            group by company_name 
            order by company_count desc 
            limit 10;
"""

JOBWELF_COUNT = """
            select count(*) from 51job_table where jobwelf != ''
"""

WORKAREA_SORT = """
        select substring_index(workarea_text, '-', 1) workarea_province, count(*) workarea_count
            from 51job_table a
            group by workarea_province
            order by workarea_count desc
            limit 20;
"""


def format_insert(i):
    INSERT_VALUES = "" \
                    "('{job_name}', '{company_name}', '{providesalary_text}', '{workarea_text}', " \
                    "'{updatedate}', '{companytype_text}', '{degreefrom}', '{workyear}', " \
                    "'{issuedate}', '{jobwelf}', '{attribute_text}', " \
                    "'{companysize_text}', '{companyind_text}') " \
                    "".format(
        job_name=i[0].strip("\\"), company_name=i[1].replace("\'", ""),
        providesalary_text=i[2],
        workarea_text=i[3], updatedate=i[4],
        companytype_text=i[5],
        degreefrom=i[6], workyear=i[7],
        issuedate=i[8], jobwelf=i[9],
        attribute_text=" ".join(i[10]),
        companysize_text=i[11],
        companyind_text=i[12].strip()
    )
    return INSERT_VALUES


DEGREEFROM_WORKYEAR_COMPANYSIZE_TEXT = """
select degreefrom, workyear, companysize_text
from 51job_table
where degreefrom !="" and workyear != "" and companysize_text != "" and companytype_text != "";
"""

ANALYSIS_COMPANYTYPE_TEXT = """
select companytype_text
from 51job_table
where degreefrom !="" and workyear != "" and companysize_text != "" and companytype_text != "";
"""

########################################################################################################################
# new
SALARY_WORKTIME="""
select providesalary_text,workarea_text from 51job_table limit 10;
"""